# -*- coding:utf-8 -*-

__author__ = 'ghostviper'

extract_rules = {
    'title': {
        'method': 'xpath',
        'extract_rules': ('//*[@class="list_left"]/h1', ),
        'in': ('get_text', 'unicode'),
        'out': 'takefirst'
    },
    'body': {
        'method': 'xpath',
        'extract_rules': ('//*[@class="detailbg"]', ),
        'in': ('get_text', 'unicode'),
        'out': 'takefirst'
    },
    'pub_time': {
        'method': 'xpath',
        'extract_rules': ('//*[@id="pubtime_baidu"]', ),
        'in': ('get_text', 'unicode'),
        'out': 'takefirst'
    }
}

start_urls = [
    "http://xw.kunming.cn/today.htm",
]

crawler_rule = {
    'meta_info': {
        'crawler_name': 'test_crawler',
        'follow_link': True,
        'limit_depths': 0,
        'limit_items': 0,
        'limit_requests': 0,
        'restrict_xpath': ('//*[@class="listbg"]', ),
    },
    'extract_rules': extract_rules,
    'start_urls': start_urls
}






